# Get file paths
BEPA_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/BEPA-dup_divergent_sites.txt"
BLAU_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/BLAU-dup_divergent_sites.txt"
CMCB_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/CMCB-dup_divergent_sites.txt"
DNSE_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/DNSE-dup_divergent_sites.txt"
ECHO_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/ECHO-dup_divergent_sites.txt"
JADE_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/JADE-dup_divergent_sites.txt"
KFSY_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/KFSY-dup_divergent_sites.txt"
# Read in BEPA divergent sites file
BEPA_df <- read_tsv(BEPA_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`BEPA-dup2` == `BEPA-dup1`~ "dup1",
                          `BEPA-dup2` == `BEPA-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`BEPA-dup3` == `BEPA-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
BEPA_df
# Read in CMCB divergent sites file
CMCB_df <- read_tsv(CMCB_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`CMCB-dup2` == `CMCB-dup1`~ "dup1",
                          `CMCB-dup2` == `CMCB-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`CMCB-dup3` == `CMCB-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
CMCB_df
# Read in DNSE divergent sites file
DNSE_df <- read_tsv(DNSE_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`DNSE-dup2` == `DNSE-dup1`~ "dup1",
                          `DNSE-dup2` == `DNSE-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`DNSE-dup3` == `DNSE-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
DNSE_df
# Read in ECHO divergent sites file
ECHO_df <- read_tsv(ECHO_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`ECHO-dup2` == `ECHO-dup1`~ "dup1",
                          `ECHO-dup2` == `ECHO-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`ECHO-dup3` == `ECHO-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
ECHO_df
# Read in JADE divergent sites file
JADE_df <- read_tsv(JADE_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`JADE-dup2` == `JADE-dup1`~ "dup1",
                          `JADE-dup2` == `JADE-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`JADE-dup3` == `JADE-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
JADE_df
# Read in KFSY divergent sites file
KFSY_df <- read_tsv(KFSY_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`KFSY-dup2` == `KFSY-dup1`~ "dup1",
                          `KFSY-dup2` == `KFSY-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`KFSY-dup3` == `KFSY-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
KFSY_df
# Make function to generate heatmap plot
div_pos_plot3 <- function(df){
  p<- df %>%
    ggplot(aes(x = factor(Position), y = reorder(dup, desc(dup)), fill = variant_type)) +
    geom_tile(color="white") +
    scale_fill_manual(values = c("#00674b", "#00d399")) + 
    scale_x_discrete(position = "top") +
    theme_minimal(base_size = 7) +
    theme(axis.title.x = element_blank(),
#      axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
    panel.grid = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    legend.position="none"
  )
  return(p)
}
# Make function to generate heatmap plot when there are 4 duplication copies
div_pos_plot4 <- function(df){
  p<- df %>%
    ggplot(aes(x = factor(Position), y = reorder(dup, desc(dup)), fill = variant_type)) +
    geom_tile(color="white") +
    scale_fill_manual(values = c("#00674b", "#00d399","grey")) + 
    scale_x_discrete(position = "top") +
    theme_minimal(base_size = 7) +
    theme(axis.title.x = element_blank(),
#      axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
    panel.grid = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    legend.position="none"
  )
  return(p)
}

# Make CMCB plot
CMCB_plot <- div_pos_plot3(CMCB_df) + ylab("CMCB")
CMCB_plot

# Make JADE plot
JADE_plot <- div_pos_plot3(JADE_df) + ylab("JADE")
JADE_plot

# Make BLAU plot
BLAU_plot <- div_pos_plot4(BLAU_df) + ylab("BLAU")
BLAU_plot

Analysis using same position numbers for all alignments

# Read in BLAU divergent sites file
all_nogaps_df <- read_tsv(all_nogaps_file) %>% 
    pivot_longer(cols = -Position, names_to = c("sample", "dup"),names_sep = "-") %>% 
    pivot_wider(names_from = dup, values_from = value) %>% 
  mutate(dup1_type = case_when(!is.na(dup1) ~ "dup1"),
         dup2_type = case_when(dup2 == dup1 ~ "dup1",
                               sample != "BLAU" & dup2 == dup3 ~ "dupL", 
                               sample == "BLAU" & dup2 == dup4 ~ "dupL",
                               sample == "BLAU" & dup2 == dup3 ~ "other",
                               is.na(dup2) ~ "other"),
         dup3_type = case_when(dup3 == dup1 ~ "dup1",
                               dup3 == dup4 ~ "dupL",
                               sample == "BLAU" & dup3 == dup2 ~ "other",
                               !is.na(dup3) ~ "dupL"),
         dup4_type = case_when(dup4 == dup1 ~ "dup1",
                               !is.na(dup4) ~ "dupL")) %>% 
  pivot_longer(ends_with("type"), names_to = "dup", values_to = "variant_type") %>% 
  mutate(dup = str_remove(dup, "_type"),
         sample = fct_relevel(sample, levels = c("BEPA", "ECHO", "JADE", "CMCB", "DNSE", "KFSY", "BLAU"))) %>% 
  filter(!is.na(variant_type), !(sample == "BLAU" & dup1==dup2 & dup1==dup3 & dup1==dup4), !(sample != "BLAU" & dup1==dup2 & dup1==dup3))
Error in `mutate()`:
ℹ In argument: `sample = fct_relevel(...)`.
Caused by error in `fct_relevel()`:
! Arguments in `...` must be passed by position, not name.
✖ Problematic argument:
• levels = c("BEPA", "ECHO", "JADE", "CMCB", "DNSE", "KFSY", "BLAU")
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.
# Make function to generate heatmap plot when there are 4 duplication copies
div_pos_plot_all <- function(df){
  p<- df %>%
    ggplot(aes(x = factor(Position), y = reorder(dup, desc(dup)), fill = variant_type)) +
    geom_tile(color="white") +
    scale_fill_manual(values = c("#00674b", "#00d399","grey")) + 
    scale_x_discrete(position = "top") +
    theme_minimal(base_size = 6.5) +
    theme(axis.title.x = element_blank(),
#      axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
    panel.grid = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    legend.position="none"
  )
  return(p)
}

ggsave("/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/shared_plot_allSamps_allPos_figure.pdf", width = 6.5, height = 2.5, units = "in")
# Since CMCB, DNSE, KFSY, and JADE results are the same, filter down to one representative
div_pos_plot_all(filter(all_nogaps_df, sample %in% c("BEPA", "ECHO", "CMCB", "BLAU"), (sample == "BLAU" & dup1!=dup4)| (sample != "BLAU" & dup1!=dup3))) + 
  facet_grid(rows = vars(sample), scale="free_y", space="free_y", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

# Plot just BEPA with BEPA-divergent positions only, but position numbers are comparable to others
BEPA_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "BEPA", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

BEPA_shared_plot

# Plot just CMCB with CMCB-divergent positions only, but position numbers are comparable to others
CMCB_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "CMCB", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

CMCB_shared_plot

# Plot just DNSE with DNSE-divergent positions only, but position numbers are comparable to others
DNSE_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "DNSE", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

DNSE_shared_plot

# Plot just ECHO with ECHO-divergent positions only, but position numbers are comparable to others
ECHO_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "ECHO", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

ECHO_shared_plot

# Plot just JADE with JADE-divergent positions only, but position numbers are comparable to others
JADE_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "JADE", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

JADE_shared_plot

# Plot just KFSY with KFSY-divergent positions only, but position numbers are comparable to others
KFSY_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "KFSY", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

KFSY_shared_plot

# Plot just BLAU with BLAU-divergent positions only, but position numbers are comparable to others
BLAU_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "BLAU", dup1!=dup4)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

BLAU_shared_plot

shared_plot_subset <- plot_grid(BEPA_shared_plot, ECHO_shared_plot, CMCB_shared_plot, BLAU_shared_plot, ncol = 1)
shared_plot_subset

ggsave("/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/shared_plot_subset_figure.pdf", width = 6.5, height = 2.2, units = "in")
---
title: "R Notebook"
output: html_notebook
---


```{r}
library(tidyverse)
library(cowplot)
```

```{r}
# Get file paths
BEPA_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/BEPA-dup_divergent_sites.txt"
BLAU_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/BLAU-dup_divergent_sites.txt"
CMCB_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/CMCB-dup_divergent_sites.txt"
DNSE_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/DNSE-dup_divergent_sites.txt"
ECHO_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/ECHO-dup_divergent_sites.txt"
JADE_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/JADE-dup_divergent_sites.txt"
KFSY_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/KFSY-dup_divergent_sites.txt"
```

```{r}
# Read in BEPA divergent sites file
BEPA_df <- read_tsv(BEPA_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`BEPA-dup2` == `BEPA-dup1`~ "dup1",
                          `BEPA-dup2` == `BEPA-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`BEPA-dup3` == `BEPA-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
BEPA_df
```

```{r}
# Read in BLAU divergent sites file
BLAU_df <- read_tsv(BLAU_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`BLAU-dup2` == `BLAU-dup1`~ "dup1",
                          `BLAU-dup2` == `BLAU-dup4`~ "dup4",
                          `BLAU-dup2` == `BLAU-dup3`~ "dup2-3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`BLAU-dup3` == `BLAU-dup1`~ "dup1",
                          `BLAU-dup3` == `BLAU-dup4`~ "dup4",
                          `BLAU-dup3` == `BLAU-dup2`~ "dup2-3",
                          TRUE ~ "dup3_unique"),
         dup4 = case_when(`BLAU-dup4` == `BLAU-dup1`~ "dup1",
                          TRUE ~ "dup4")) %>% 
  filter(dup2 != "dup2_unique", dup3 != "dup3_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
BLAU_df
```

```{r}
# Read in CMCB divergent sites file
CMCB_df <- read_tsv(CMCB_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`CMCB-dup2` == `CMCB-dup1`~ "dup1",
                          `CMCB-dup2` == `CMCB-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`CMCB-dup3` == `CMCB-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
CMCB_df
```

```{r}
# Read in DNSE divergent sites file
DNSE_df <- read_tsv(DNSE_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`DNSE-dup2` == `DNSE-dup1`~ "dup1",
                          `DNSE-dup2` == `DNSE-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`DNSE-dup3` == `DNSE-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
DNSE_df
```

```{r}
# Read in ECHO divergent sites file
ECHO_df <- read_tsv(ECHO_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`ECHO-dup2` == `ECHO-dup1`~ "dup1",
                          `ECHO-dup2` == `ECHO-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`ECHO-dup3` == `ECHO-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
ECHO_df
```

```{r}
# Read in JADE divergent sites file
JADE_df <- read_tsv(JADE_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`JADE-dup2` == `JADE-dup1`~ "dup1",
                          `JADE-dup2` == `JADE-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`JADE-dup3` == `JADE-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
JADE_df
```

```{r}
# Read in KFSY divergent sites file
KFSY_df <- read_tsv(KFSY_file) %>% 
  mutate(dup1 = "dup1",
         dup2 = case_when(`KFSY-dup2` == `KFSY-dup1`~ "dup1",
                          `KFSY-dup2` == `KFSY-dup3`~ "dup3",
                          TRUE ~ "dup2_unique"),
         dup3 = case_when(`KFSY-dup3` == `KFSY-dup1`~ "dup1",
                          TRUE ~ "dup3")) %>% 
  filter(dup2 != "dup2_unique") %>% 
  pivot_longer(starts_with("dup"), names_to = "dup", values_to = "variant_type")
KFSY_df
```

```{r}
# Make function to generate heatmap plot
div_pos_plot3 <- function(df){
  p<- df %>%
    ggplot(aes(x = factor(Position), y = reorder(dup, desc(dup)), fill = variant_type)) +
    geom_tile(color="white") +
    scale_fill_manual(values = c("#00674b", "#00d399")) + 
    scale_x_discrete(position = "top") +
    theme_minimal(base_size = 7) +
    theme(axis.title.x = element_blank(),
#      axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
    panel.grid = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    legend.position="none"
  )
  return(p)
}
```

```{r}
# Make function to generate heatmap plot when there are 4 duplication copies
div_pos_plot4 <- function(df){
  p<- df %>%
    ggplot(aes(x = factor(Position), y = reorder(dup, desc(dup)), fill = variant_type)) +
    geom_tile(color="white") +
    scale_fill_manual(values = c("#00674b", "#00d399","grey")) + 
    scale_x_discrete(position = "top") +
    theme_minimal(base_size = 7) +
    theme(axis.title.x = element_blank(),
#      axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
    panel.grid = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    legend.position="none"
  )
  return(p)
}
```

```{r fig.height=1, fig.width=10}
# Make BEPA plot
BEPA_plot <- div_pos_plot3(BEPA_df) + ylab("BEPA")
BEPA_plot
```

```{r fig.height=1, fig.width=10}
# Make CMCB plot
CMCB_plot <- div_pos_plot3(CMCB_df) + ylab("CMCB")
CMCB_plot
```

```{r fig.height=1, fig.width=10}
# Make DNSE plot
DNSE_plot <- div_pos_plot3(DNSE_df) + ylab("DNSE")
DNSE_plot
```


```{r fig.height=1, fig.width=10}
ECHO_plot <- div_pos_plot3(ECHO_df) + ylab("ECHO")
ECHO_plot
```

```{r fig.height=1, fig.width=10}
# Make JADE plot
JADE_plot <- div_pos_plot3(JADE_df) + ylab("JADE")
JADE_plot
```

```{r fig.height=1, fig.width=10}
# Make KFSY plot
KFSY_plot <- div_pos_plot3(KFSY_df) + ylab("KFSY")
KFSY_plot
```

```{r fig.height=1, fig.width=10}
# Make BLAU plot
BLAU_plot <- div_pos_plot4(BLAU_df) + ylab("BLAU")
BLAU_plot
```

```{r}
plot_grid(BEPA_plot, ECHO_plot, JADE_plot, CMCB_plot, DNSE_plot, KFSY_plot, BLAU_plot, ncol = 1)
```

```{r}
plot_grid(BEPA_plot, ECHO_plot, CMCB_plot, BLAU_plot, ncol = 1)
```

## Analysis using same position numbers for all alignments
```{r}
all_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/5-6-copy_dup_divergent_sites.txt"
all_nogaps_file <- "/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/5-6-copy_dup_nogaps_divergent_sites.txt"
```

```{r}
# Read in BLAU divergent sites file
all_nogaps_df <- read_tsv(all_nogaps_file) %>% 
    pivot_longer(cols = -Position, names_to = c("sample", "dup"),names_sep = "-") %>% 
    pivot_wider(names_from = dup, values_from = value) %>% 
  mutate(dup1_type = case_when(!is.na(dup1) ~ "dup1"),
         dup2_type = case_when(dup2 == dup1 ~ "dup1",
                               sample != "BLAU" & dup2 == dup3 ~ "dupL", 
                               sample == "BLAU" & dup2 == dup4 ~ "dupL",
                               sample == "BLAU" & dup2 == dup3 ~ "other",
                               is.na(dup2) ~ "other"),
         dup3_type = case_when(dup3 == dup1 ~ "dup1",
                               dup3 == dup4 ~ "dupL",
                               sample == "BLAU" & dup3 == dup2 ~ "other",
                               !is.na(dup3) ~ "dupL"),
         dup4_type = case_when(dup4 == dup1 ~ "dup1",
                               !is.na(dup4) ~ "dupL")) %>% 
  pivot_longer(ends_with("type"), names_to = "dup", values_to = "variant_type") %>% 
  mutate(dup = str_remove(dup, "_type"),
         sample = factor(sample, levels = c("BEPA", "ECHO", "JADE", "CMCB", "DNSE", "KFSY", "BLAU"))) %>% 
  filter(!is.na(variant_type), !(sample == "BLAU" & dup1==dup2 & dup1==dup3 & dup1==dup4), !(sample != "BLAU" & dup1==dup2 & dup1==dup3))
```
```{r}
# Make function to generate heatmap plot when there are 4 duplication copies
div_pos_plot_all <- function(df){
  p<- df %>%
    ggplot(aes(x = factor(Position), y = reorder(dup, desc(dup)), fill = variant_type)) +
    geom_tile(color="white") +
    scale_fill_manual(values = c("#00674b", "#00d399","grey")) + 
    scale_x_discrete(position = "top") +
    theme_minimal(base_size = 6.5) +
    theme(axis.title.x = element_blank(),
#      axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
    panel.grid = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    legend.position="none"
  )
  return(p)
}
```

```{r}
all_nogaps_df
div_pos_plot_all(filter(all_nogaps_df, (sample == "BLAU" & dup1!=dup4)| (sample != "BLAU" & dup1!=dup3))) + 
  facet_grid(rows = vars(sample), scale="free_y", space="free_y", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank(), panel.spacing.y = unit(0.1, "lines"))
```
```{r}
ggsave("/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/shared_plot_allSamps_allPos_figure.pdf", width = 6.5, height = 2.5, units = "in")
```


```{r}
# Since CMCB, DNSE, KFSY, and JADE results are the same, filter down to one representative
div_pos_plot_all(filter(all_nogaps_df, sample %in% c("BEPA", "ECHO", "CMCB", "BLAU"), (sample == "BLAU" & dup1!=dup4)| (sample != "BLAU" & dup1!=dup3))) + 
  facet_grid(rows = vars(sample), scale="free_y", space="free_y", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())
```
```{r}
# Plot just BEPA with BEPA-divergent positions only, but position numbers are comparable to others
BEPA_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "BEPA", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

BEPA_shared_plot
```
```{r}
# Plot just CMCB with CMCB-divergent positions only, but position numbers are comparable to others
CMCB_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "CMCB", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

CMCB_shared_plot
```
```{r}
# Plot just DNSE with DNSE-divergent positions only, but position numbers are comparable to others
DNSE_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "DNSE", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

DNSE_shared_plot
```
```{r}
# Plot just ECHO with ECHO-divergent positions only, but position numbers are comparable to others
ECHO_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "ECHO", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

ECHO_shared_plot
```
```{r}
# Plot just JADE with JADE-divergent positions only, but position numbers are comparable to others
JADE_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "JADE", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

JADE_shared_plot
```

```{r}
# Plot just KFSY with KFSY-divergent positions only, but position numbers are comparable to others
KFSY_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "KFSY", dup1!=dup3)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

KFSY_shared_plot
```

```{r}
# Plot just BLAU with BLAU-divergent positions only, but position numbers are comparable to others
BLAU_shared_plot <- div_pos_plot_all(filter(all_nogaps_df, sample == "BLAU", dup1!=dup4)) + 
  facet_grid(rows = vars(sample), scales="free", space="free", switch = "y") +
  theme(strip.placement = "outside", axis.title.y = element_blank())

BLAU_shared_plot
```

```{r}
shared_plot_subset <- plot_grid(BEPA_shared_plot, ECHO_shared_plot, CMCB_shared_plot, BLAU_shared_plot, ncol = 1)
shared_plot_subset
```
```{r}
ggsave("/Volumes/lab_kingsley/ambenj/myosin_dups/analysis/nahr_analysis/shared_plot_subset_figure.pdf", width = 6.5, height = 2.2, units = "in")
```

